## The basic files and libraries needed for most presentations
# creates the libraries and common-functions sections
read_chunk("../common/utility_functions.R")

require(ggplot2) #for plots
require(lattice) # nicer scatter plots
require(plyr) # for processing data.frames
require(grid) # contains the arrow function
require(biOps) # for basic image processing
require(doMC) # for parallel code
require(png) # for reading png images
require(gridExtra)
require(reshape2) # for the melt function
## To install EBImage
# source("http://bioconductor.org/biocLite.R")
# biocLite("EBImage")
require(EBImage) # for more image processing
used.libraries<-c("ggplot2","lattice","plyr","reshape2","grid","gridExtra","biOps","png","EBImage")

# start parallel environment
registerDoMC()
# functions for converting images back and forth
im.to.df<-function(in.img) {
  out.im<-expand.grid(x=1:nrow(in.img),y=1:ncol(in.img))
  out.im$val<-as.vector(in.img)
  out.im
}
df.to.im<-function(in.df,val.col="val",inv=F) {
  in.vals<-in.df[[val.col]]
  if(class(in.vals[1])=="logical") in.vals<-as.integer(in.vals*255)
  if(inv) in.vals<-255-in.vals
  out.mat<-matrix(in.vals,nrow=length(unique(in.df$x)),byrow=F)
  attr(out.mat,"type")<-"grey"
  out.mat
}
ddply.cutcols<-function(...,cols=1) {
  # run standard ddply command 
  cur.table<-ddply(...)
  cutlabel.fixer<-function(oVal) {
    sapply(oVal,function(x) {
      cnv<-as.character(x)
      mean(as.numeric(strsplit(substr(cnv,2,nchar(cnv)-1),",")[[1]]))
    })
  }
  cutname.fixer<-function(c.str) {
    s.str<-strsplit(c.str,"(",fixed=T)[[1]]
    t.str<-strsplit(paste(s.str[c(2:length(s.str))],collapse="("),",")[[1]]
    paste(t.str[c(1:length(t.str)-1)],collapse=",")
  }
  for(i in c(1:cols)) {
    cur.table[,i]<-cutlabel.fixer(cur.table[,i])
    names(cur.table)[i]<-cutname.fixer(names(cur.table)[i])
  }
  cur.table
}

show.pngs.as.grid<-function(file.list,title.fun,zoom=1) {
  preparePng<-function(x) rasterGrob(readPNG(x,native=T,info=T),width=unit(zoom,"npc"),interp=F)
  labelPng<-function(x,title="junk") (qplot(1:300, 1:300, geom="blank",xlab=NULL,ylab=NULL,asp=1)+
                                        annotation_custom(preparePng(x))+
                                        labs(title=title)+theme_bw(24)+
                                        theme(axis.text.x = element_blank(),
                                              axis.text.y = element_blank()))
  imgList<-llply(file.list,function(x) labelPng(x,title.fun(x)) )
  do.call(grid.arrange,imgList)
}
## Standard image processing tools which I use for visualizing the examples in the script
commean.fun<-function(in.df) {
  ddply(in.df,.(val), function(c.cell) {
    weight.sum<-sum(c.cell$weight)
    data.frame(xv=mean(c.cell$x),
               yv=mean(c.cell$y),
               xm=with(c.cell,sum(x*weight)/weight.sum),
               ym=with(c.cell,sum(y*weight)/weight.sum)
    )
  })
}

colMeans.df<-function(x,...) as.data.frame(t(colMeans(x,...)))

pca.fun<-function(in.df) {
  ddply(in.df,.(val), function(c.cell) {
    c.cell.cov<-cov(c.cell[,c("x","y")])
    c.cell.eigen<-eigen(c.cell.cov)
    
    c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
    out.df<-cbind(c.cell.mean,
                  data.frame(vx=c.cell.eigen$vectors[1,],
                             vy=c.cell.eigen$vectors[2,],
                             vw=sqrt(c.cell.eigen$values),
                             th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,]))
    )
  })
}
vec.to.ellipse<-function(pca.df) {
  ddply(pca.df,.(val),function(cur.pca) {
    # assume there are two vectors now
    create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"],
                          b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"],
                          th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]),
                          x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"])
  })
}

# test function for ellipse generation
# ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal()
create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) {
  if (is.null(b)) b<-a
  th<-seq(0,th.max,length.out=pts)
  data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off,
             y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off,
             id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...)
}
deform.ellipse.draw<-function(c.box) {
  create.ellipse.points(x.off=c.box$x[1],
                        y.off=c.box$y[1],
                        a=c.box$a[1],
                        b=c.box$b[1],
                        th.off=c.box$th[1],
                        col=c.box$col[1])                    
}
bbox.fun<-function(in.df) {
  ddply(in.df,.(val), function(c.cell) {
    c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
    xmn<-emin(c.cell$x)
    xmx<-emax(c.cell$x)
    ymn<-emin(c.cell$y)
    ymx<-emax(c.cell$y)
    out.df<-cbind(c.cell.mean,
                  data.frame(xi=c(xmn,xmn,xmx,xmx,xmn),
                             yi=c(ymn,ymx,ymx,ymn,ymn),
                             xw=xmx-xmn,
                             yw=ymx-ymn
                  ))
  })
}

# since the edge of the pixel is 0.5 away from the middle of the pixel
emin<-function(...) min(...)-0.5
emax<-function(...) max(...)+0.5
extents.fun<-function(in.df) {
  ddply(in.df,.(val), function(c.cell) {
    c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
    out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)),
                                         xmax=c(c.cell.mean$x,emax(c.cell$x)),
                                         ymin=c(emin(c.cell$y),c.cell.mean$y),
                                         ymax=c(emax(c.cell$y),c.cell.mean$y)))
  })
}

common.image.path<-"../common"
qbi.file<-function(file.name) file.path(common.image.path,"figures",file.name)
qbi.data<-function(file.name) file.path(common.image.path,"data",file.name)

th_fillmap.fn<-function(max.val) scale_fill_gradientn(colours=rainbow(10),limits=c(0,max.val))

Quantitative Big Imaging

author: Kevin Mader, Christian Dietz date: 19 February 2015 width: 1440 height: 900 css: ../common/template.css transition: rotate

ETHZ: 227-0966-00L

Introductions and Workflows

Course Outline

source('../common/schedule.R')

Who are we?

Kevin Mader


Marco Stampanoni

Who are we (continued)?

Anders Kaestner

Who are we (continued)?

Filippo Arcadu


Christian Dietz

Who are you?

A wide spectrum of backgrounds

A wide range of skills


So how will this ever work?

Adaptive assignments

  1. Conceptual, graphical assignments with practical examples
  2. Opportunities to create custom implementations, plugins, and perform more complicated analysis on larger datasets if interested

Course Expectations

Exercises


Science Project

Literature / Useful References

General Material


Today

Motivation

Crazy Workflow

Motivation (Why does this class exist?)


X-Ray

Optical

Toys

Motivation (Is it getting better?)

  1. Experimental Design finding the right technique, picking the right dyes and samples has stayed relatively consistent, better techniques lead to more demanding scientits.

  2. Management storing, backing up, setting up databases, these processes have become easier and more automated as data magnitudes have increased

  3. Measurements the actual acquisition speed of the data has increased wildly due to better detectors, parallel measurement, and new higher intensity sources

  4. Post Processing this portion has is the most time-consuming and difficult and has seen minimal improvements over the last years


library("ggthemes")
# guesstimates
time.data<-data.frame(
  year=c(2000,2008,2014,2020),
  "Experiment Design"=c(10,10,8,8),
  "Measurements"=c(50,5,0.5,0.1),
  "Management"=c(20,15,10,8),
  "Post Processing"=c(50,50,50,50)
  )
mtd<-ddply(melt(time.data,id.vars="year"),.(year),
           function(x) cbind(x,sum.val=sum(x$value),norm.val=100*x$value/sum(x$value))
           )
mtd$variable<-gsub("[.]"," ",mtd$variable)
ggplot(mtd,aes(x=as.factor(year),y=norm.val,fill=variable))+
  geom_bar(stat="identity")+
  labs(x="Year",y="Relative Time",fill="",title="Experimental Time Breakdown")+
  theme_economist(20)
plot of chunk time-figure

Saturating Output

output.df<-data.frame(Year=time.data$year,
                      Measurements=
                        365*24/(time.data$Experiment.Design+time.data$Measurements),
                           Publications=365*24/rowSums(time.data[,c(2:5)])
                           )
mtd2<-melt(output.df,id.vars="Year")
ggplot(mtd2,aes(x=as.factor(Year),y=value,fill=variable))+
  geom_bar(stat="identity",position="dodge")+
  scale_y_sqrt()+
  labs(x="Year",y="Output",fill="")+
  theme_wsj(25)
plot of chunk scaling

kable(output.df,digits=0)
Year Measurements Publications
2000 146 67
2008 584 110
2014 1031 128
2020 1081 133

To put more real numbers on these scales rather than 'pseudo-publications', the time to measure a terabyte of data is shown in minutes.

mmtb<-data.frame(
  Year=c(2000,2008,2014,2016),
  y=1024/c(1/4,60/64,32,8*60)
  )
names(mmtb)[2]<-"Time to 1 TB in Minutes"
kable(mmtb,digits=0)
Year Time to 1 TB in Minutes
2000 4096
2008 1092
2014 32
2016 2

How much is a TB, really?

If you looked at one 1000 x 1000 sized image

image(255*matrix(runif(1000*1000),nrow=1000))
plot of chunk unnamed-chunk-4

every second, it would take you

# assuming 16 bit images and a 'metric' terabyte
time.per.tb<-1e12/(1000*1000*16/8) / (60*60)
cat("__",
  round(time.per.tb),
  "__",sep="")

139 hours to browse through a terabyte of data.


names(mmtb)[2]<-"Time to 1 TB"
mmtb$"Man power to keep up"<-time.per.tb*60/mmtb$"Time to 1 TB"
mmtb[,2]<-paste(round(mmtb[,2]),"min") # add minutes suffix
mmtb$"Salary Costs / Month"<-mmtb$"Man power to keep up"*12500
mmtb$"Man power to keep up"<-paste(round(mmtb$"Man power to keep up")," people")
mmtb$"Salary Costs / Month"<-paste(round(mmtb$"Salary Costs / Month"/1000)," kCHF")
kable(mmtb,digits=0)
Year Time to 1 TB Man power to keep up Salary Costs / Month
2000 4096 min 2 people 25 kCHF
2008 1092 min 8 people 95 kCHF
2014 32 min 260 people 3255 kCHF
2016 2 min 3906 people 48828 kCHF

Overwhelmed


cells in bone tissue

More overwhelmed


more samples

Kill me now


even more samples

It gets better


alignment

Dynamic Information


Computing has changed: Parallel

Moores Law

\[ \textrm{Transistors} \propto 2^{T/(\textrm{18 months})} \]

# stolen from https://gist.github.com/humberto-ortiz/de4b3a621602b78bf90d
moores.txt<-c("Id Name  Year  Count(1000s)  Clock(MHz)\n",
        "0            MOS65XX  1975           3.51           14\n",
        "1          Intel8086  1978          29.00           10\n",
        "2          MIPSR3000  1988         120.00           33\n",
        "3           AMDAm486  1993        1200.00           40\n",
        "4        NexGenNx586  1994        3500.00          111\n",
        "5          AMDAthlon  1999       37000.00         1400\n",
        "6   IntelPentiumIII  1999       44000.00         1400\n",
        "7         PowerPC970  2002       58000.00         2500\n",
        "8       AMDAthlon64  2003      243000.00         2800\n",
        "9    IntelCore2Duo  2006      410000.00         3330\n",
        "10         AMDPhenom  2007      450000.00         2600\n",
        "11      IntelCorei7  2008     1170000.00         3460\n",
        "12      IntelCorei5  2009      995000.00         3600")
moores.pt.text<-gsub("zZ","\n",gsub("\\s+",",",paste(moores.txt,collapse="zZ")))
moores.df<-read.csv(text=moores.pt.text)
names(moores.df)[c(4,5)]<-c("kTransistors","Speed")
moores.df<-moores.df[,c(2:5)]

moores.table<-melt(moores.df,
  id.vars=c("Year","Name"))
moores.law<-function(year) moores.df[1,"kTransistors"]*2^((year-moores.df[1,"Year"])/1.5)
moores.table$variable<-gsub("Speed","Speed (MHz)",moores.table$variable)
ggplot(moores.table,aes(Year,value,color=variable))+
  geom_line(aes(linetype="Moore's Law",y=moores.law(moores.table$Year)),color="black",
            size=2,alpha=0.8)+
  geom_jitter()+  
  geom_smooth()+

  scale_y_log10()+
  labs(color="",y="",linetype="")+
  theme_economist(20)
Based on trends from Wikipedia and Intel

Based on data from https://gist.github.com/humberto-ortiz/de4b3a621602b78bf90d


There are now many more transistors inside a single computer but the processing speed hasn't increased. How can this be?

Parallel Code is important

Computing has changed: Cloud

cloudCostGen<-function(cloudPeakCost,cloudSpotCost) function(hours,peakRatio=1) 
  hours*peakRatio*cloudPeakCost+hours*(1-peakRatio)*cloudSpotCost
localCostGen<-function(compCost,energyPriceStd,compPowerRaw) {
  energyPrice<-energyPriceStd/1e6 # $ / Whr
  function(hours)
    compCost+energyPrice*compPowerRaw*hours
}

make.res.table<-function(cloudCost,localCost,  years) {

  ldply(years,function(life) {
    timef<-seq(0,life*24*365,length.out=50)
    data.frame(life=life*12,hours=timef,
               hoursPerWeek=timef/(life*52),
               WorstCaseCloud=cloudCost(timef,1),
               BestCaseCloud=cloudCost(timef,0),
               MiddleCaseCloud=cloudCost(timef,0.5),
               LocalWorkstation=localCost(timef))
  })
}

random.sim<-function(cloudCost,localCost,n.guess) {
  utility<-runif(n.guess,min=0,max=1)
  serviceYears<-runif(n.guess,min=1.5,max=6.5)
  peak<-runif(n.guess,min=0,max=1)
  hours<-serviceYears*utility*24*365
  ot<-data.frame(hours=hours,
             years=serviceYears,
             months=serviceYears*12,
             ryears=round(serviceYears),
             utility=utility,
             peak=peak,
             clcost = cloudCost(hours,peak),
             wscost = localCost(hours)
  )
  ot$cloudPremium<-with(ot,clcost/wscost*100)
  ot
}

scen.fcn<-function(c.pts) {
  data.frame(
    clcost=mean(c.pts$clcost),
    wscost=mean(c.pts$wscost)
  )
}

scen.grid<-function(scen.table.raw) {
  ot<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,7),
                                     cut_interval(peak,7),
                                     cut_interval(months,4)),cols=3,
                    scen.fcn
  )
  ot$cloudPremium<-with(ot,clcost/wscost*100)
  ot
}

scen.table<-function(scen.table.raw) {
  ot<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,25),
                                     cut_interval(peak,5),
                                     cut_interval(months,5)),cols=3,
                    scen.fcn
  )
  ot$cloudPremium<-with(ot,clcost/wscost*100)
  
  ddply(ot,.(peak,months),function(c.pts) {
    n.pts<-subset(c.pts,clcost<=wscost)
    subset(n.pts,utility==max(n.pts$utility))
  })
}

compCost<-3999.99 # http://www.bestbuy.com/site/cybertronpc-desktop-intel-core-i7-64gb-memory-2tb-hdd-120gb-solid-state-drive-120gb-solid-state-drive-black-blue/6357048.p?id=1219209052767&skuId=6357048
compPowerRaw<-190  #Watts http://www.eu-energystar.org/en/en_008.shtml#use
energyPriceStd<-0.143 # Eu/KWHr http://www.eu-energystar.org/en/en_008.shtml#use
energyPrice<-energyPriceStd/1000*1.2 # $ / Whr
cloudPeakCost<-0.780
cloudSpotCost<-0.097

cloudCost<-function(hours,peakRatio=1) 
  hours*peakRatio*cloudPeakCost+hours*(1-peakRatio)*cloudSpotCost
localCost<-function(hours)
  compCost+energyPrice*compPowerRaw*hours

http://www-inst.eecs.berkeley.edu/~cs61c/sp14/ “The Case for Energy-Proportional Computing,” Luiz André Barroso, Urs Hölzle, IEEE Computer, December 2007

cloud services


Cloud Computing Costs

The figure shows the range of cloud costs (determined by peak usage) compared to a local workstation with utilization shown as the average number of hours the computer is used each week.

res.table<-
  ldply(c(3),function(life) {
    timef<-seq(0,life*24*365,length.out=50)
    data.frame(life=life*12,hours=timef,
                      hoursPerWeek=timef/(life*52),
                      WorstCaseCloud=cloudCost(timef,1),
                      BestCaseCloud=cloudCost(timef,0),
                      MiddleCaseCloud=cloudCost(timef,0.5),
                      LocalWorkstation=localCost(timef))
  })
ggplot(res.table,
       aes(x=hoursPerWeek))+
  geom_line(aes(y=LocalWorkstation,color="Local Workstation"),size=2)+
  geom_errorbar(aes(ymin=WorstCaseCloud,ymax=BestCaseCloud,color="Cloud"))+
  labs(x="Average Computer Usage (hr/wk)",y="Cost ($)",color="Solution")+
  facet_wrap(~life)+
  theme_bw(25)
plot of chunk unnamed-chunk-7


The figure shows the cost of a cloud based solution as a percentage of the cost of buying a single machine. The values below 1 show the percentage as a number. The panels distinguish the average time to replacement for the machines in months

n.guess<-1e6
utility<-runif(n.guess,min=0,max=1)
serviceYears<-runif(n.guess,min=1.5,max=6.5)
peak<-runif(n.guess,min=0,max=1)
hours<-serviceYears*utility*24*365
scen.table.raw<-data.frame(hours=hours,
                       years=serviceYears,
                       months=serviceYears*12,
                       ryears=round(serviceYears),
                       utility=utility,
                       peak=peak,
                       clcost = cloudCost(hours,peak),
                       wscost = localCost(hours)
                                  )
scen.fcn<-function(c.pts) {
                                             data.frame(
                                               clcost=mean(c.pts$clcost),
                                               wscost=mean(c.pts$wscost)
                                               )
                                             }
scen.table<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,7),
                                           cut_interval(peak,7),
                                           cut_interval(months,4)),cols=3,
                          scen.fcn
                                           )
scen.table$cloudPremium<-with(scen.table,clcost/wscost*100)
ggplot(scen.table,aes(y=utility*100,x=peak*100,fill=cloudPremium))+
  geom_raster()+
  geom_text(aes(label=ifelse(cloudPremium<100,round(cloudPremium),"")))+
  labs(x="Peak Usage (%)",y="Utilization (%)",fill="Cloud Costs\n(% of Workstation)")+
  scale_fill_gradientn(colours=rainbow(4))+
  facet_wrap(~months)+
  theme_bw(10)
plot of chunk unnamed-chunk-8

Cloud: Equal Cost Point

Here the equal cost point is shown where the cloud and local workstations have the same cost. The x-axis is the percentage of resources used at peak-time and the y shows the expected usable lifetime of the computer. The color indicates the utilization percentage and the text on the squares shows this as the numbers of hours used in a week.


scen.table<-ddply.cutcols(scen.table.raw,.(cut_interval(utility,25),
                                           cut_interval(peak,5),
                                           cut_interval(months,5)),cols=3,
                          scen.fcn
                                           )
scen.table.sub<-ddply(scen.table,.(peak,months),function(c.pts) {
  n.pts<-subset(c.pts,clcost<=wscost)
  subset(n.pts,utility==max(n.pts$utility))
})

ggplot(scen.table.sub,aes(100*peak,months,fill=100*utility))+
  geom_raster(alpha=0.65)+
  geom_text(aes(label=round(utility*168)))+
  labs(x="% Peak Time",y="Computer Lifetime\n(Months)",fill="Utilization (%)")+
  scale_fill_gradientn(colours=rainbow(2))+
  theme_bw(20)
plot of chunk unnamed-chunk-9

Course Overview

source('../common/schedule.R')

More Detail

What is an image?

A very abstract definition: A pairing between spatial information (position) and some other kind of information (value).

In most cases this is a 2 dimensional position (x,y coordinates) and a numeric value (intensity)

basic.image<-im.to.df(matrix(round(runif(5*5,0,100)),nrow=5))
names(basic.image)[3]<-"Intensity"
kable(head(basic.image))
x y Intensity
1 1 87
2 1 31
3 1 12
4 1 90
5 1 60
1 2 87

This can then be rearranged from a table form into an array form and displayed as we are used to seeing images

ggplot(basic.image,aes(x,y))+
  geom_tile(fill="yellow",color="black")+
  geom_text(aes(label=paste("(",x,",",y,")\n",Intensity)))+
  coord_equal()+
  theme_bw(15)
plot of chunk unnamed-chunk-12

2D Intensity Images

The next step is to apply a color map (also called lookup table, LUT) to the image so it is a bit more exciting

simple.image<-ggplot(basic.image,aes(x,y,fill=Intensity))+
  geom_tile(color="black")+
  geom_text(aes(label=paste("(",x,",",y,")\n",round(Intensity))))+
  coord_equal()+
  theme_bw(15)

simple.image
plot of chunk unnamed-chunk-13

Which can be arbitrarily defined based on how we would like to visualize the information in the image


simple.image+
  scale_fill_gradientn(colours=rainbow(6))
plot of chunk unnamed-chunk-14

simple.image+
  scale_fill_gradientn(colours=c("black","red","white"),trans="log")
plot of chunk unnamed-chunk-15

Lookup Tables

Formally a lookup table is a function which \[ f(\textrm{Intensity}) \rightarrow \textrm{Color} \]

ggplot(basic.image,aes(x=Intensity,y=Intensity))+
  geom_bar(aes(fill=Intensity),stat="identity",position="dodge",color="black",width=2)+
  scale_fill_gradientn(colours=rainbow(6))+
  labs(y="Color")+
  theme_bw(20)+
  theme(axis.ticks = element_blank(), 
        axis.text.y = element_blank())
plot of chunk unnamed-chunk-16

These transformations can also be non-linear as is the case of the graph below where the mapping between the intensity and the color is a \(\log\) relationship meaning the the difference between the lower values is much clearer than the higher ones


ggplot(basic.image,aes(x=Intensity,y=log(Intensity)))+
  geom_bar(aes(fill=Intensity),stat="identity",position="dodge",color="black",width=2)+
  scale_fill_gradientn(colours=c("black","red","white"),trans="log")+
  labs(y="Color")+
  theme_bw(20)+
  theme(axis.ticks = element_blank(), 
        axis.text.y = element_blank())
plot of chunk unnamed-chunk-17

On a real image the difference is even clearer

bone.img<-t(readPNG(qbi.file("tiny-bone.png")))
attr(bone.img,"type")<-"grey"
meas.img<-im.to.df(flip(gblur(bone.img,3)))
both.img<-rbind(
    cbind(mutate(meas.img,val=0.9*val+0.4),src="Normal"),
    cbind(mutate(meas.img,val=log10(40*val)),src="Log-transformed")
)
ggplot(both.img,aes(x,y,fill=val))+
  geom_tile()+
  coord_equal()+
  facet_wrap(~src)+
  scale_fill_gradientn(colours=c("black","red","white"))+
  #guides(fill=F)+
  labs(x="",y="")+
  theme_bw(20)+
  theme(axis.ticks = element_blank(), 
        axis.text.x = element_blank(),
        axis.text.y = element_blank())
plot of chunk unnamed-chunk-18

3D Images

For a 3D image, the position or spatial component has a 3rd dimension (z if it is a spatial, or t if it is a movie)

basic.image<-expand.grid(x=1:3,y=1:3,z=1:3)
basic.image$Intensity<-round(runif(nrow(basic.image),0,100))
kable(head(basic.image))
x y z Intensity
1 1 1 25
2 1 1 87
3 1 1 39
1 2 1 60
2 2 1 43
3 2 1 72

This can then be rearranged from a table form into an array form and displayed as a series of slices

ggplot(basic.image,aes(x,y))+
  geom_tile(aes(fill=Intensity),color="black",alpha=0.8)+
  geom_text(aes(label=paste("(",x,",",y,",",z,")\n",Intensity)))+
  facet_grid(z~.)+
  guides(fill=F)+
  scale_fill_gradientn(colours=rainbow(4))+
  theme_bw(15)
plot of chunk unnamed-chunk-20

Multiple Values

In the images thus far, we have had one value per position, but there is no reason there cannot be multiple values. In fact this is what color images are (red, green, and blue) values and even 4 channels with transparency (alpha) as a different. For clarity we call the dimensionality of the image the number of dimensions in the spatial position, and the depth the number in the value.

basic.image<-expand.grid(x=1:5,y=1:5)
basic.image$Intensity<-round(runif(nrow(basic.image),0,100))
basic.image$Transparency<-round(runif(nrow(basic.image),0,100))
kable(head(basic.image))
x y Intensity Transparency
1 1 53 3
2 1 95 94
3 1 64 18
4 1 43 44
5 1 14 35
1 2 37 73

This can then be rearranged from a table form into an array form and displayed as a series of slices

ggplot(basic.image,aes(x,y))+
  geom_tile(aes(fill=Intensity,alpha=Transparency),color="black")+
  geom_text(aes(
    label=paste("(",x,",",y,")\n","I:",Intensity,"\nT:",Transparency)
    ))+
  scale_fill_gradientn(colours=rainbow(4))+
  theme_bw(15)
plot of chunk unnamed-chunk-22

Hyperspectral Imaging

At each point in the image (black dot), instead of having just a single value, there is an entire spectrum. A selected group of these (red dots) are shown to illustrate the variations inside the sample. While certainly much more complicated, this still constitutes and image and requires the same sort of techniques to process correctly.

hyper.path<-qbi.data
map.img<-t(readJpeg(hyper.path("raw.jpg"))[,,3]+readJpeg(hyper.path("raw.jpg"))[,,1])#,1,rev)
map.df<-im.to.df(map.img)
impos<-read.table(hyper.path("impos.csv"),sep=",")
names(impos)<-c("x","y")
ggplot(map.df,aes(x,y))+
  geom_raster(aes(fill=val))+
  #geom_tile(data=impos,fill="red",alpha=0.5)+
  geom_point(data=impos,alpha=0.7)+
  geom_point(data=subset(impos, abs(x-252)<20 & abs(y-293)<20),color="red")+
  labs(fill="White Field\nIntensity")+
  xlim(min(impos$x),max(impos$x))+
  ylim(min(impos$y),max(impos$y))+
  coord_equal()+
  theme_bw(20)#,aes(fill="Scanned"))
plot of chunk load_hypermap


full.df<-read.csv(hyper.path("full_img.csv"))

full.df<-subset(full.df,wavenum<2500)
ggplot(
  subset(full.df, abs(x-252)<20 & abs(y-293)<20),aes(wavenum,val)
  )+
  geom_line()+
  facet_grid(x~y)+
  labs(x=expression(paste("Wavenumber (",cm^-1,")",sep="")),y="Intensity (au)")+
  theme_bw(10)
plot of chunk unnamed-chunk-23

Image Formation

Traditional Imaging

Where do images come from?

in.table<-read.delim(text="Modality\tImpulse   Characteristic  Response    Detection
Light Microscopy    White Light Electronic interactions Absorption  Film, Camera
Phase Contrast  Coherent light  Electron Density (Index of Refraction)  Phase Shift Phase stepping, holography, Zernike
Confocal Microscopy Laser Light Electronic Transition in Fluorescence Molecule  Absorption and reemission   Pinhole in focal plane, scanning detection
X-Ray Radiography   X-Ray light Photo effect and Compton scattering Absorption and scattering   Scintillator, microscope, camera
Ultrasound  High frequency sound waves  Molecular mobility  Reflection and Scattering   Transducer
MRI Radio-frequency EM  Unmatched Hydrogen spins    Absorption and reemission   RF coils to detect
Atomic Force Microscopy Sharp Point Surface Contact Contact, Repulsion  Deflection of a tiny mirror",header=T)
kable(in.table,caption="Various modalities and their ways of being recorder")
Modality Impulse Characteristic Response Detection
Light Microscopy White Light Electronic interactions Absorption Film, Camera
Phase Contrast Coherent light Electron Density (Index of Refraction) Phase Shift Phase stepping, holography, Zernike
Confocal Microscopy Laser Light Electronic Transition in Fluorescence Molecule Absorption and reemission Pinhole in focal plane, scanning detection
X-Ray Radiography X-Ray light Photo effect and Compton scattering Absorption and scattering Scintillator, microscope, camera
Ultrasound High frequency sound waves Molecular mobility Reflection and Scattering Transducer
MRI Radio-frequency EM Unmatched Hydrogen spins Absorption and reemission RF coils to detect
Atomic Force Microscopy Sharp Point Surface Contact Contact, Repulsion Deflection of a tiny mirror

Acquiring Images

Traditional / Direct imaging

bone.img<-t(readPNG(qbi.file("tiny-bone.png")))
attr(bone.img,"type")<-"grey"
meas.img<-im.to.df(flip(gblur(bone.img,3)))
both.img<-rbind(
  cbind(meas.img,src="Measured"),
  cbind(im.to.df(bone.img),src="Reconstructed")
  )
ggplot(both.img,aes(x,y,fill=val))+
  geom_tile()+
  coord_equal()+
  facet_wrap(~src)+
  th_fillmap.fn(2)+
  guides(fill=F)+
  labs(x="",y="")+
  theme_bw(20)+
  theme(axis.ticks = element_blank(), 
        axis.text.x = element_blank(),
        axis.text.y = element_blank())
 here the measurement is supposed to be from a typical microscope which blurs, flips and otherwise distorts the image but the original representation is still visible


Indirect / Computational imaging

##
fftshift2<-function (y) 
{
    nx = nrow(y)
    nx2 = floor(nx/2)
    ny = ncol(y)
    ny2 = floor(ny/2)
    y[c((nx2+1):nx,1:nx2),c((ny2+1):ny,1:ny2)]
}
scat.img<-im.to.df(abs(fftshift2(fft(bone.img))))
scat.img<-mutate(scat.img,val=log(val)/3.5)
both.img<-rbind(
  cbind(scat.img,src="Measured"),
  cbind(im.to.df(bone.img),src="Reconstructed")
  )
ggplot(both.img,aes(x,y,fill=val))+
  geom_tile()+
  coord_equal()+
  facet_wrap(~src)+
  th_fillmap.fn(2)+
  guides(fill=F)+
  labs(x="",y="")+
  theme_bw(20)+
  theme(axis.ticks = element_blank(), 
        axis.text.x = element_blank(),
        axis.text.y = element_blank())
here the measurement is supposed to be from a diffraction style experiment where the data is measured in reciprocal space (fourier) and can be reconstructed to the original shape

Traditional Imaging

Traditional Imaging

Copyright 2003-2013 J. Konrad in EC520 lecture, reused with permission

Traditional Imaging: Model

Traditional Imaging Model

\[ \left[\left([b(x,y)*s_{ab}(x,y)]\otimes h_{fs}(x,y)\right)*h_{op}(x,y)\right]*h_{det}(x,y)+d_{dark}(x,y) \]

\(s_{ab}\) is the only information you are really interested in, so it is important to remove or correct for the other components

For color (non-monochromatic) images the problem becomes even more complicated \[ \int_{0}^{\infty} {\left[\left([b(x,y,\lambda)*s_{ab}(x,y,\lambda)]\otimes h_{fs}(x,y,\lambda)\right)*h_{op}(x,y,\lambda)\right]*h_{det}(x,y,\lambda)}\mathrm{d}\lambda+d_{dark}(x,y) \]

Indirect Imaging (Computational Imaging)


Suface Topography

On Science

What is the purpose?

How?


Important Points

Inspired by: imagej-pres

Science and Imaging

Reproducibility

Science demands repeatability! and really wants reproducability


How can we keep track of everything for ourselves and others?

Soup Example

Easy to follow the list, anyone with the right steps can execute and repeat (if not reproduce) the soup

Simple Soup

  1. Buy {carrots, peas, tomatoes} at market
  2. then Buy meat at butcher
  3. then Chop carrots into pieces
  4. then Chop potatos into pieces
  5. then Heat water
  6. then Wait until boiling then add chopped vegetables
  7. then Wait 5 minutes and add meat

More complicated soup

Here it is harder to follow and you need to carefully keep track of what is being performed

Steps 1-4

  1. then Mix carrots with potatos \(\rightarrow mix_1\)
  2. then add egg to \(mix_1\) and fry for 20 minutes
  3. then Tenderize meat for 20 minutes
  4. then add tomatoes to meat and cook for 10 minutes \(\rightarrow mix_2\)
  5. then Wait until boiling then add \(mix_1\)
  6. then Wait 5 minutes and add \(mix_2\)

Using flow charts / workflows

Simple Soup

library(igraph)
node.names<-c("Buy\nvegetables","Buy meat","Chop\nvegetables","Heat water","Wait for\nboiling","Wait 5\nadd meat")
c.mat<-matrix(0,length(node.names),length(node.names))
for(i in c(1:(length(node.names)-1))) c.mat[i,i+1]<-1
colnames(c.mat)<-node.names
rownames(c.mat)<-node.names
g<-graph.adjacency(c.mat,mode="directed")
V(g)$degree <- degree(g)
V(g)$label <- V(g)$name
V(g)$color <- "lightblue"
V(g)$size<-80
E(g)$width<-2

E(g)$color<-"black"

plot(g,layout=layout.circle)#,  layout= layout.kamada.kawai) ##
plot of chunk unnamed-chunk-27


Complicated Soup

new.nodes<-c(node.names,"Mix carrots\npotatoes","add egg\nand fry","tenderize\nmeat",
             "add tomatoes\ncook 10min","wait 5 minutes")
c.mat<-matrix(0,length(new.nodes),length(new.nodes))
# old connections
for(i in c(1:(length(new.nodes)-1))) c.mat[i,i+1]<-1
colnames(c.mat)<-new.nodes
rownames(c.mat)<-new.nodes
c.mat[1,2]<-0
c.mat[1,3]<-1
c.mat[2,3]<-0
c.mat[2,9]<-1
c.mat[6,7]<-0
c.mat[6,11]<-1
# chop vegies
c.mat[3,]<-0
c.mat[3,7]<-1

c.mat[8,]<-0
c.mat[8,5]<-1

g<-graph.adjacency(c.mat,mode="directed")
V(g)$degree <- degree(g)
V(g)$label <- V(g)$name
V(g)$color <- "lightblue"
V(g)$size<-40
E(g)$width<-2

E(g)$color<-"black"

plot(g,layout=layout.circle)#,  layout= layout.kamada.kawai) ##
plot of chunk unnamed-chunk-28

Workflows

Clearly a linear set of instructions is ill-suited for even a fairly easy soup, it is then even more difficult when there are dozens of steps and different pathsways

plot(g,layout= layout.sugiyama(g)$layout)
plot of chunk unnamed-chunk-29


Furthermore a clean workflow allows you to better parallelize the task since it is clear which tasks can be performed independently

V(g)[c(1,3,7,8)]$color <- "red"
V(g)[c(4)]$color <- "green"
V(g)[c(2,9,10)]$color <- "yellow"
plot(g,layout= layout.sugiyama(g)$layout)
plot of chunk unnamed-chunk-30